library(GSVA)
library(openxlsx)
library(tidyverse)
library(Biobase)
library(ComplexHeatmap)
library(circlize)
library(hypeR)
PATH <- file.path(Sys.getenv("MLAB"), "projects/brcameta/exosome/4t1_brca")
TCGAPATH <- file.path(Sys.getenv("CBM"),"TCGA-GDC/")
METABRICPATH <- file.path(Sys.getenv("CBM"), "METABRIC/")
DPATH <- file.path(Sys.getenv("CBM"),"otherStudies/RNAseq/2023-03-22-YuhanExosomeBrCa")
do_save <- FALSE
Loading Data
tcga_brca <- readRDS(file.path(TCGAPATH,"/esets_filtered/TCGA-BRCA_2020-03-22_DESeq2_log_filtered_eset.rds"))
metabric <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet.rds"))
metabric_impute <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet_impute.rds"))
#Signatures
## Obesity signature from Fuentes-Mattei et al., JNCI 2014, Table S3.
obDiff <- read.xlsx( file.path(PATH,"data/JNCI2014_SupplementaryTable3_ObeseSignatureBrCa.xlsx") )
colnames(obDiff)[match(c("Gene.Title.(Definition)","Log.ratio.(Log10)","p-value*"),colnames(obDiff))] <-
c("Gene.Title","Log10.ratio","pvalue")
obSig <- list(
ob.up=dplyr::filter(obDiff,Log10.ratio>0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull(),
ob.dn=dplyr::filter(obDiff,Log10.ratio<0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull())
diabetes_sigs <- readRDS(file.path(PATH, "data/diabetes_sigs.rds")) %>% unlist(use.names=TRUE, recursive = FALSE)
## Hallmark Signatures
hallmark_genesets <- msigdb_download(species="Homo sapiens", category="H")
hallmark_emt <- hallmark_genesets["HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"]
## Signatures from RNA-Seq experiments
all_sigs <- readRDS(file.path(PATH,"data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) > 1]
all_sigs_human <- lapply(all_sigs, babelgene::orthologs, species="mouse", human=FALSE)
all_sigs_human <- lapply(all_sigs_human, function (x) x %>% dplyr::pull(human_symbol))
all_sigs_human <- c(all_sigs_human, obSig, diabetes_sigs, hallmark_emt)
GSVA
# GSVA
if (do_save) {
#subtype_filter <- is.na(tcga_brca$subtype_selected)
#tcga_brca <- tcga_brca[,!subtype_filter]
#tcga_brca <- tcga_brca[,tcga_brca$subtype_selected == "BRCA.LumA"]
#metabric <- metabric[,metabric$Pam50_SUBTYPE == "LumA"]
tcga_gsva <- gsva(tcga_brca, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
metabric_gsva <- gsva(metabric, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
saveRDS(metabric_gsva, file.path(PATH, "data/metabric_gsva_res_obs.rds"))
saveRDS(tcga_gsva, file.path(PATH, "data/tcga_gsva_res_obs.rds"))
} else {
tcga_gsva <- readRDS(file.path(PATH, "data/tcga_gsva_res_obs.rds"))
metabric_gsva <- readRDS(file.path(PATH, "data/metabric_gsva_res_obs.rds"))
}
# Subtype filtering
metabric_gsva <- metabric_gsva[,metabric_gsva$Pam50_SUBTYPE != "NC"]
subtype_filter <- is.na(tcga_gsva$subtype_selected)
tcga_gsva <- tcga_gsva[,!subtype_filter]
pData(tcga_gsva) <- pData(tcga_gsva) %>% mutate(pam50subtype = str_split(subtype_selected, pattern="BRCA.", simplify=TRUE)[,2])
tcga_gsva$ir_c <- t(exprs(tcga_gsva["ir_c_up",]) - exprs(tcga_gsva["ir_c_down"]))
tcga_gsva$tgfb_c <- t(exprs(tcga_gsva["tgfb_c_up",])-exprs(tcga_gsva["tgfb_c_down",]))
tcga_gsva$ir_is <- t(exprs(tcga_gsva["ir_is_up",])-exprs(tcga_gsva["ir_is_down",]))
tcga_gsva$tgfb_is <- t(exprs(tcga_gsva["tgfb_is_up",])-exprs(tcga_gsva["tgfb_is_down",]))
tcga_gsva$ob <- t(exprs(tcga_gsva["ob.up",])-exprs(tcga_gsva["ob.dn",]))
metabric_gsva$ir_c <- t(exprs(metabric_gsva["ir_c_up",]) - exprs(metabric_gsva["ir_c_down"]))
metabric_gsva$tgfb_c <- t(exprs(metabric_gsva["tgfb_c_up",])-exprs(metabric_gsva["tgfb_c_down",]))
metabric_gsva$ir_is <- t(exprs(metabric_gsva["ir_is_up",])-exprs(metabric_gsva["ir_is_down",]))
metabric_gsva$tgfb_is <- t(exprs(metabric_gsva["tgfb_is_up",])-exprs(metabric_gsva["tgfb_is_down",]))
metabric_gsva$ob <- t(exprs(metabric_gsva["ob.up",])-exprs(metabric_gsva["ob.dn",]))
Boxplots
col.pam50 <- c(LumA="pink",LumB="red",Her2="yellow",Basal="darkgray",Normal="white")
for (gset in c("ir_c", "ir_is", "tgfb_c", "ob")) {
par(mfrow=c(1,2))
boxplot(pData(tcga_gsva)[,gset] ~ tcga_gsva$pam50subtype, col=col.pam50, las=2,
xlab="pam50",ylab=gset,main="TCGA" )
boxplot(pData(metabric_gsva)[,gset] ~ metabric_gsva$Pam50_SUBTYPE, col=col.pam50, las=2,
xlab="pam50",ylab=gset,main="METABRIC" )
}




Correlation with Obesity
TCGA
sample_col <- hcl.colors(n=nlevels(tcga_gsva$pam50subtype %>% factor))
names(sample_col) <- levels(tcga_gsva$pam50subtype %>% factor)
ha.genes <- HeatmapAnnotation(sample_type=tcga_gsva$pam50subtype,
col=list(sample_type=sample_col))
png(file=file.path(PATH, "results/GSVA_tcga_obs_Heatmap.png"))
gsva_heatmap <- Heatmap(t(scale(t(exprs(tcga_gsva)))),
name="Enrichment Score",
col=colorRamp2(c(-3, 0, 3), c("#072448", "white", "#ff6150")),
top_annotation=ha.genes,
cluster_rows=TRUE,
cluster_columns=TRUE,
clustering_distance_rows="euclidean",
clustering_method_rows="ward.D",
clustering_distance_columns="euclidean",
clustering_method_columns="ward.D",
show_parent_dend_line=TRUE,
column_title = "samples",
column_dend_height = unit(5, "mm"),
row_title="Genesets",
row_dend_width = unit(5, "mm"),
show_column_names=FALSE,
show_row_names=TRUE)
draw(gsva_heatmap)
dev.off()
## png
## 2
gsva_heatmap

Corplot between Obesity/Diabetes signatures
library(ggcorrplot)
gs_cor <- cor(t(exprs(tcga_gsva)))
ob_cor <- gs_cor[grepl("ob|dz",rownames(gs_cor)),grepl("ob|dz",colnames(gs_cor))]
ggcorrplot(ob_cor, hc.order = FALSE, type = "lower",
outline.col = "white",
ggtheme = ggplot2::theme_gray,
lab=TRUE,
lab_size=3,
colors = c("#6D9EC1", "white", "#E46726"))

Exosome Signatures & Diabetes/Obesity Signatures
cor_df <- as.data.frame(as.table(gs_cor))
colnames(cor_df) <- c("GS1", "GS2", "cor")
cor_df$GS1 <- str_replace(cor_df$GS1, pattern="dn", replacement = "down")
cor_df$GS2 <- str_replace(cor_df$GS2, pattern="dn", replacement = "down")
cor_df %>%
filter(grepl("ir_c|is_c|ir_is|tgfb_c|tgfb_ir", GS1)) %>%
filter(grepl("dz|ob", GS2)) %>%
separate(col = GS2, into = c("GS2", "direction"), sep = "\\.") %>%
ggplot() +
geom_boxplot(aes(x=GS1, y=cor, fill=direction)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
labs(title = "Correlation between DE signatures and diabetes/obesity signatures")

Up-Down
tcga_updown_cor <- cor(pData(tcga_gsva)[,c("ir_c", "ir_is", "tgfb_c", "tgfb_is", "ob")])
ggcorrplot(tcga_updown_cor, hc.order = FALSE, type = "lower",
outline.col = "white",
ggtheme = ggplot2::theme_gray,
lab=TRUE,
lab_size=3,
colors = c("#6D9EC1", "white", "#E46726"))

Metabric
sample_col <- hcl.colors(n=nlevels(metabric_gsva$Pam50_SUBTYPE %>% factor))
names(sample_col) <- levels(metabric_gsva$Pam50_SUBTYPE %>% factor)
ha.genes <- HeatmapAnnotation(sample_type=metabric_gsva$Pam50_SUBTYPE,
col=list(sample_type=sample_col))
png(file=file.path(PATH, "results/GSVA_metabric_obs_Heatmap.png"))
gsva_heatmap <- Heatmap(t(scale(t(exprs(metabric_gsva)))),
name="Enrichment Score",
col=colorRamp2(c(-3, 0, 3), c("#072448", "white", "#ff6150")),
top_annotation=ha.genes,
cluster_rows=TRUE,
cluster_columns=TRUE,
clustering_distance_rows="euclidean",
clustering_method_rows="ward.D",
clustering_distance_columns="euclidean",
clustering_method_columns="ward.D",
show_parent_dend_line=TRUE,
column_title = "samples",
column_dend_height = unit(5, "mm"),
row_title="Genesets",
row_dend_width = unit(5, "mm"),
show_column_names=FALSE,
show_row_names=TRUE)
draw(gsva_heatmap)
dev.off()
## png
## 2
gsva_heatmap

Corplot between Obesity/Diabetes signatures
gs_cor <- cor(t(exprs(metabric_gsva)))
ob_cor <- gs_cor[grepl("ob|dz",rownames(gs_cor)),grepl("ob|dz",colnames(gs_cor))]
ggcorrplot(ob_cor, hc.order = FALSE, type = "lower",
outline.col = "white",
ggtheme = ggplot2::theme_gray,
lab=TRUE,
lab_size=3,
colors = c("#6D9EC1", "white", "#E46726"))

Exosome Signatures & Diabetes/Obesity Signatures
cor_df <- as.data.frame(as.table(gs_cor))
colnames(cor_df) <- c("GS1", "GS2", "cor")
cor_df$GS1 <- str_replace(cor_df$GS1, pattern="dn", replacement = "down")
cor_df$GS2 <- str_replace(cor_df$GS2, pattern="dn", replacement = "down")
cor_df %>%
filter(grepl("ir_c|is_c|ir_is|tgfb_c|tgfb_ir", GS1)) %>%
filter(grepl("dz|ob", GS2)) %>%
separate(col = GS2, into = c("GS2", "direction"), sep = "\\.") %>%
ggplot() +
geom_boxplot(aes(x=GS1, y=cor, fill=direction)) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Up-Down
metabric_updown_cor <- cor(pData(metabric_gsva)[,c("ir_c", "ir_is", "tgfb_c", "tgfb_is", "ob")])
ggcorrplot(metabric_updown_cor, hc.order = FALSE, type = "lower",
outline.col = "white",
ggtheme = ggplot2::theme_gray,
lab=TRUE,
lab_size=3,
colors = c("#6D9EC1", "white", "#E46726"))

KS-based enrichment
## Add gene symbols to DESeq tables
dat <- readRDS(file.path(PATH, "data/4T1_mets_sExp.rds"))
deseq_list <- readRDS(file.path(PATH,"data/deseq_list.rds"))
hallmark_genesets <- msigdb_download(species="Mus musculus", category="H")
hallmark_genesets$OBESITY <- obSig
deseq_list <- lapply(deseq_list, function(Z) {
as.data.frame(Z) %>%
tibble::rownames_to_column(var="ensembl_id") %>%
dplyr::mutate(geneID=unlist(rowData(dat)[ensembl_id,"mgi_symbol"]))
})
## rank by up-regulation
rank_signatures_up <- lapply(
deseq_list, function(sig) sig %>%
dplyr::filter(geneID!="") %>%
dplyr::arrange(desc(log2FoldChange)) %>%
dplyr::select(geneID,log2FoldChange) %>%
tibble::deframe())
names(rank_signatures_up) <- paste(names(rank_signatures_up),"up",sep="_")
## rank by down-regulation
rank_signatures_dn <- lapply(
deseq_list,function(sig) sig %>%
dplyr::filter(geneID!="") %>%
dplyr::arrange(log2FoldChange) %>%
dplyr::select(geneID,log2FoldChange) %>%
tibble::deframe())
names(rank_signatures_dn) <- paste(names(rank_signatures_dn),"dn",sep="_")
rank_signatures <- c(rank_signatures_up,rank_signatures_dn)
rank_signatures <- rank_signatures[order(names(rank_signatures),decreasing = TRUE)]
Hallmarks + Pamm
#all_genesets <- c(hallmark_genesets, pamm_genelists)
max_fdr <- 0.05
ks_hall <- hypeR::hypeR(signature=rank_signatures,genesets=hallmark_genesets,test="ks",fdr=max_fdr,plotting=TRUE)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr/5,top=25) + ggtitle(paste("FDR ≤", max_fdr/5))

#hypeR::rctbl_build(ks_hall, show_hmaps=FALSE)
IS C UP
print(ks_hall$data$is_c_up$plots[ks_hall$data$is_c_up$data$fdr<=0.01])
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

##
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

##
## $HALLMARK_KRAS_SIGNALING_DN

##
## $HALLMARK_XENOBIOTIC_METABOLISM

##
## $HALLMARK_TNFA_SIGNALING_VIA_NFKB

##
## $HALLMARK_BILE_ACID_METABOLISM

##
## $HALLMARK_INFLAMMATORY_RESPONSE

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_MYOGENESIS

IS C Down
print(ks_hall$data$is_c_dn$plots[ks_hall$data$is_c_dn$data$fdr<=0.01])
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_E2F_TARGETS

##
## $HALLMARK_G2M_CHECKPOINT

##
## $HALLMARK_CHOLESTEROL_HOMEOSTASIS

##
## $HALLMARK_MYC_TARGETS_V2

##
## $HALLMARK_MTORC1_SIGNALING

##
## $HALLMARK_MITOTIC_SPINDLE

##
## $HALLMARK_UNFOLDED_PROTEIN_RESPONSE

IR IS Up
print(ks_hall$data$ir_is_up$plots[ks_hall$data$ir_is_up$data$fdr<=0.01])
## $HALLMARK_G2M_CHECKPOINT

##
## $HALLMARK_E2F_TARGETS

##
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_MITOTIC_SPINDLE

IR IS Down
print(ks_hall$data$ir_is_dn$plots[ks_hall$data$ir_is_dn$data$fdr<=0.01])
## $HALLMARK_MYOGENESIS

##
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

##
## $HALLMARK_ADIPOGENESIS

IR C Up
print(ks_hall$data$ir_c_up$plots[ks_hall$data$ir_c_up$data$fdr<=0.01])
## $HALLMARK_XENOBIOTIC_METABOLISM

##
## $HALLMARK_TNFA_SIGNALING_VIA_NFKB

##
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

IR C Down
print(ks_hall$data$ir_c_dn$plots[ks_hall$data$ir_c_dn$data$fdr<=0.01])
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_MYC_TARGETS_V2

##
## $HALLMARK_MTORC1_SIGNALING

##
## $HALLMARK_CHOLESTEROL_HOMEOSTASIS

##
## $HALLMARK_E2F_TARGETS

##
## $HALLMARK_UNFOLDED_PROTEIN_RESPONSE

##
## $HALLMARK_G2M_CHECKPOINT

##
## $HALLMARK_MYOGENESIS

##
## $HALLMARK_TNFA_SIGNALING_VIA_NFKB
